### Analysis of estimated force multipliers, using base model estimates

date()
library("tidyverse")
library("assertr")
library("foreach")
library("Formula")
library("maxLik")
library("xtable")
sessionInfo()


load("kr_analysis_participant.rda")
load("kr_state_year.rda")
load("results/fit_base.rda")


## Given a model fit and state-level data, calculate the force overall multipliers and their component parts
calc_mult <- function(data_part, fit) {
    ## Average participant-level covariates across imputations
    ##
    ## Doing this so that the overall force multiplier we report is exactly the
    ## product of the individual ones
    data_part_avg <- data_part %>%
        bind_rows() %>%
        group_by(id, ccode) %>%
        summarise_if(is.numeric, mean) %>%
        ungroup() %>%
        left_join(data_part[[1]] %>% select(id, ccode, stateabb, statenme),
                  by = c("id", "ccode")) %>%
        assert(not_na, -ends_with("lag_pure")) %>%
        select(id, ccode, stateabb, statenme, everything())

    ## Extract relevant design matrices for calculating force multipliers
    f_part <- update(fit[[1]]$f_participant, id ~ . - 1 | . - 1)
    mf_part <- model.frame(formula = f_part,
                           data = data_part_avg,
                           xlev = fit[[1]]$xlev_participant)
    X <- model.matrix(f_part, data = mf_part, rhs = 1)
    Z <- model.matrix(f_part, data = mf_part, rhs = 2)
    colnames(X) <- paste("beta", colnames(X), sep = ":")
    colnames(Z) <- paste("gamma", colnames(Z), sep = ":")

    ## Take average coefficients across imputations
    cf <- fit %>%
        map(pluck("fit")) %>%
        map(coef) %>%
        do.call("rbind", .) %>%
        colMeans()
    stopifnot(all(colnames(X) %in% names(cf)))
    stopifnot(all(colnames(Z) %in% names(cf)))
    beta <- cf[colnames(X)]
    gamma <- cf[colnames(Z)]

    ## Calculate individual components of force multipliers
    X_mult <- exp(sweep(X, MARGIN = 2, STATS = beta, FUN = "*"))
    Z_mult <- exp(sweep(-1 * Z, MARGIN = 2, STATS = gamma, FUN = "*"))
    dat_mult <- cbind(X_mult, Z_mult)
    dat_mult <- cbind(dat_mult, "multiplier" = apply(dat_mult, 1, prod))
    dat_mult <- as_tibble(dat_mult)
    names(dat_mult) <- names(dat_mult) %>%
        str_replace("(beta|gamma):", "") %>%
        str_replace("(log|log1p)\\(", "") %>%
        str_replace("\\)", "")
    dat_mult <- bind_cols(select(data_part_avg, id:year), dat_mult) %>%
        select(id:year, multiplier, everything())

    ## Calculate grouped components
    dat_mult <- dat_mult %>%
        mutate(economic = gdp_pwt * irst * pec * pct_imports,
               demographic = tpop * upop,
               geopolitical = distance * nuclear,
               political = polity2) %>%
        verify(all.equal(multiplier, economic * demographic * geopolitical * political))

    dat_mult
}


## -----------------------------------------------------------------------------
## Multipliers for observed dispute participants
## -----------------------------------------------------------------------------

## Calculate multipliers for MID participants within sample
mult_all <- calc_mult(data_part = data_participant, fit = fit_base) %>%
  mutate(statenme = if_else(statenme == "United States of America", "United States", statenme),
         statenme = if_else(statenme == "St. Vincent and the Grenadines", "St.\\ Vincent", statenme),
         statenme = str_replace(statenme, "&", "and"))

## Table of top and bottom overall multipliers
tab_mult <- mult_all %>%
    arrange(desc(multiplier)) %>%
    mutate(n = row_number()) %>%
    slice(1:10, (n()-9):n()) %>%
    mutate_at(vars(year, id), ~ sprintf("%d", .)) %>%
    mutate_at(vars(multiplier:political), ~ sprintf("%.2f", .)) %>%
    transmute("~" = n,
              Country = statenme,
              Year = year,
              "MID\\#" = id,
              Multiplier = multiplier,
              Demographic = demographic,
              Economic = economic,
              Political = political,
              Geopolitical = geopolitical)

## Extract model coefficients to put in individual multiplier tables
cf_base <- fit_base %>%
  map("fit") %>%
  map(coef) %>%
  reduce(`+`)
cf_base <- cf_base / length(fit_base)
cf_names <- names(cf_base)
cf_prefix <- str_replace(cf_names, "([a-z]*):.*", "\\1")
cf_base <- if_else(cf_prefix == "gamma", -1 * cf_base, cf_base)
cf_base <- formatC(cf_base, digits = 2, format = "f")
cf_base <- str_glue("$\\beta = {cf_base}$")
names(cf_base) <- cf_names

## Table of top and bottom economic multipliers
tab_econ <- mult_all %>%
  arrange(desc(economic)) %>%
  mutate(n = row_number()) %>%
  slice(1:5, (n()-4):n()) %>%
  mutate_at(vars(year, id), ~ sprintf("%d", .)) %>%
  mutate_at(vars(multiplier:political), ~ sprintf("%.2f", .)) %>%
  transmute("~" = n,
            Country = statenme,
            Year = year,
            "MID\\#" = id,
            Economic = economic,
            GDP = gdp_pwt,
            "Energy" = pec,
            "Iron and Steel" = irst,
            "Imports" = pct_imports) %>%
  add_row(GDP = cf_base["beta:log(gdp_pwt)"],
          Energy = cf_base["beta:log1p(pec)"],
          `Iron and Steel` = cf_base["beta:log1p(irst)"],
          Imports = cf_base["gamma:log1p(pct_imports)"],
          .after = 0)

## Table of top and bottom demographic multipliers
tab_demo <- mult_all %>%
  arrange(desc(demographic)) %>%
  mutate(n = row_number()) %>%
  slice(1:5, (n()-4):n()) %>%
  mutate_at(vars(year, id), ~ sprintf("%d", .)) %>%
  mutate_at(vars(multiplier:political), ~ sprintf("%.2f", .)) %>%
  add_row(tpop = cf_base["beta:log1p(tpop)"],
          upop = cf_base["beta:log1p(upop)"],
          .after = 0) %>%
  transmute("~" = n,
            Country = statenme,
            Year = year,
            "MID\\#" = id,
            Demographic = demographic,
            "Total Pop." = tpop,
            "Urban Pop." = upop)

## Table of top and bottom geopolitical multipliers
tab_geo <- mult_all %>%
  arrange(desc(geopolitical), multiplier) %>%
  mutate(n = row_number()) %>%
  slice((n()-4):n()) %>%
  mutate_at(vars(year, id), ~ sprintf("%d", .)) %>%
  mutate_at(vars(multiplier:political), ~ sprintf("%.3f", .)) %>%
  add_row(distance = cf_base["beta:log1p(distance)"],
          nuclear = cf_base["beta:nuclear"],
          .after = 0) %>%
  transmute("~" = n,
            Country = statenme,
            Year = year,
            "MID\\#" = id,
            Geopolitical = geopolitical,
            Distance = distance,
            Nuclear = nuclear)

## Wrapper for default xtable options
print_xtab <- function(tab, file = "", hline = NULL, is_main = FALSE) {
    print(xtable(tab,
                 align = c("r", "r", "l", "l", "l", "c", "|", rep("c", ncol(tab) - 5))),
          file = file,
          include.rownames = FALSE,
          booktabs = TRUE,
          sanitize.text.function = identity,
          hline.after = c(-1, if (is_main) 0 else 1, nrow(tab), hline),
          floating = FALSE)
}

if (!dir.exists("tables"))
  dir.create("tables")
print_xtab(tab_mult, file = "tables/table_A3.tex", hline = 10, is_main = TRUE)
print_xtab(tab_demo, file = "tables/table_2a.tex", hline = 6)
print_xtab(tab_econ, file = "tables/table_2b.tex", hline = 6)
print_xtab(tab_geo, file = "tables/table_2c.tex")


## -----------------------------------------------------------------------------
## Multipliers for major powers over time
## -----------------------------------------------------------------------------

## Calculate multipliers for USA, UK, Russia, China, and Japan over time
data_maj <- imp_state_year %>%
        map(~ filter(., ccode %in% c(2, 200, 365, 710, 740))) %>%
        map(~ mutate(., distance = 0, id = year, sidea = NA))
mult_maj <- calc_mult(data_part = data_maj, fit = fit_base)

plot_data_maj <- mult_maj %>%
    select(statenme, year, multiplier, economic:political) %>%
    gather(key = multiplier, value = value, -statenme, -year) %>%
    mutate(multiplier = fct_recode(multiplier,
                                   "Overall" = "multiplier",
                                   "Economic" = "economic",
                                   "Demographic" = "demographic",
                                   "Geopolitical" = "geopolitical",
                                   "Political" = "political"),
           multiplier = fct_reorder(multiplier, value, var, .desc = TRUE),
           statenme = if_else(statenme == "United States of America", "United States", statenme))

plot_fm_maj <-
    ggplot(plot_data_maj, aes(x = year, y = value)) +
    geom_point(size = 0.25) +
    facet_grid(multiplier ~ statenme) +
    scale_x_continuous("Year") +
    scale_y_continuous("")

if (!dir.exists("figures"))
    dir.create("figures")
pdf(file = "figures/figure_A1.pdf", width = 8.5, height = 8.5)
print(plot_fm_maj)
dev.off()


## -----------------------------------------------------------------------------
## "ANOVA" table of multiplier components
## -----------------------------------------------------------------------------

tab_anova <- mult_all %>%
  mutate(id = paste0(id, "_", ccode)) %>%
  verify(!duplicated(id)) %>%
  select(id, total = multiplier, demographic, economic, political, geopolitical) %>%
  gather(key = "name", value = "value", -id) %>%
  group_by(id) %>%
  mutate(total = value[name == "total"]) %>%
  group_by(name) %>%
  summarise(sd = sd(value),
            explained = 100 * (var(total) - var(total / value)) / var(total),
            cor = cor(value, total, method = "spearman")) %>%
  slice(5, 1, 2, 4, 3) %>%
  mutate(name = paste0(toupper(str_sub(name, 1, 1)), str_sub(name, 2, -1)),
         sd = sprintf("%.2f", sd),
         explained = sprintf("%.1f", explained),
         cor = sprintf("%.2f", cor)) %>%
  mutate_all(~ str_replace(., "^-", "$-$")) %>%
  rename(Component = name, SD = sd, "\\%Explained" = explained, Correlation = cor)

print(xtable(tab_anova,
             align = c("l", "l", "r", "r", "r")),
      file = "tables/table_1.tex",
      include.rownames = FALSE,
      booktabs = TRUE,
      sanitize.text.function = identity,
      hline.after = c(-1, 0, 1, 5),
      floating = FALSE)


date()
